By using loyalty and credit card data, we can observe that the top three most visited places during the 2 weeks data are:
As we can see in the figure below, the number of daily frequency from credit card data and loyalty data are different. Based on credit card data Katerina’s Cafe were most popular in 2014-06-01…
cc_freq <- group_by(cc_data, location) %>% summarise(freq = n())%>% arrange(desc(freq))
cc_freq
## # A tibble: 34 x 2
## location freq
## <chr> <int>
## 1 Katerina's Cafe 212
## 2 Hippokampos 171
## 3 Guy's Gyros 158
## 4 Brew've Been Served 156
## 5 Hallowed Grounds 92
## 6 Ouzeri Elian 87
## 7 Abila Zacharo 72
## 8 Gelatogalore 64
## 9 Kalami Kafenion 64
## 10 Bean There Done That 47
## # ... with 24 more rows
loyalty_freq <- group_by(loyalty_data, location) %>% summarise(freq = n())%>% arrange(desc(freq))
loyalty_freq
## # A tibble: 33 x 2
## location freq
## <chr> <int>
## 1 Katerina's Cafe 195
## 2 Hippokampos 155
## 3 Guy's Gyros 146
## 4 Brew've Been Served 140
## 5 Ouzeri Elian 84
## 6 Hallowed Grounds 80
## 7 Abila Zacharo 71
## 8 Kalami Kafenion 66
## 9 Gelatogalore 60
## 10 Bean There Done That 45
## # ... with 23 more rows
cc_freq_daily <- group_by(cc_data, timestamp_date, location) %>% summarise(freq = n())%>% arrange(desc(freq))
top3_place = c("Katerina's Cafe", "Hippokampos","Guy's Gyros")
cc_freq_daily_selected <- cc_freq_daily %>% filter(location %in% top3_place)
top3_vis_cc <- ggplot(cc_freq_daily_selected, aes(fill=as.character(timestamp_date), y=freq, x=location)) +
geom_bar(position="stack", stat="identity") +
geom_text(label = cc_freq_daily_selected$freq, position=position_stack(vjust=0.5))+
ggtitle("CC data top 3 location")
ggplotly(top3_vis_cc)
loyalty_freq_daily <- group_by(loyalty_data, timestamp, location) %>% summarise(freq = n())%>% arrange(desc(freq))
top3_place = c("Katerina's Cafe", "Hippokampos","Guy's Gyros")
loyalty_freq_daily_selected <- loyalty_freq_daily %>% filter(location %in% top3_place)
top3_vis_L <- ggplot(loyalty_freq_daily_selected, aes(fill=as.character(timestamp), y=freq, x=location)) +
geom_bar(position="stack", stat="identity") +
geom_text(label = loyalty_freq_daily_selected$freq, position=position_stack(vjust=0.5))+
ggtitle("Loyalty data top 3 location")
ggplotly(top3_vis_L)
##Question 2
gps_data <- read_csv("MC2/gps.csv")
gps_data$Timestamp_date <- date_time_parse(gps_data$Timestamp,
zone = "",
format = "%m/%d/%Y")
gps_data$Timestamp <- date_time_parse(gps_data$Timestamp,
zone = "",
format = "%m/%d/%Y %H:%M")
gps_data$Time <- format(gps_data$Timestamp, format = "%H:%M")
gps_data$Hour <- get_hour(gps_data$Timestamp)
gps_sort <- gps_data[order(gps_data$Timestamp, gps_data$id),]
stop_fin_2 = data.frame()
for (id_1 in unique(gps_sort$id))
{
make_stop = data.frame()
gps_id <- gps_sort %>%
filter(id==id_1)
for(i in 1:nrow(gps_id))
{
if(i < nrow(gps_id))
{
start <- gps_id$Timestamp[i]
end <- gps_id$Timestamp[i+1]
} else
{
start <- gps_id$Timestamp[i]
end <- gps_id$Timestamp[i]
print(start)
}
elapsed.time <- start%--%end
time_duration <- as.duration(elapsed.time)
if(time_duration > 300)
{
make_stop = rbind(make_stop,cbind(gps_id[i,],time_duration,i),cbind(gps_id[i+1,],time_duration,i))
}
}
stop_fin_2 = rbind(stop_fin_2, make_stop)
}
## [1] "2014-01-19 20:07:00 +08"
## [1] "2014-01-19 16:06:00 +08"
## [1] "2014-01-19 19:48:00 +08"
## [1] "2014-01-19 19:47:00 +08"
## [1] "2014-01-19 20:11:00 +08"
## [1] "2014-01-19 19:47:00 +08"
## [1] "2014-01-19 20:24:00 +08"
## [1] "2014-01-19 15:57:00 +08"
## [1] "2014-01-19 19:53:00 +08"
## [1] "2014-01-19 20:21:00 +08"
## [1] "2014-01-19 20:38:00 +08"
## [1] "2014-01-19 19:58:00 +08"
## [1] "2014-01-19 20:02:00 +08"
## [1] "2014-01-18 15:52:00 +08"
## [1] "2014-01-19 19:49:00 +08"
## [1] "2014-01-19 14:11:00 +08"
## [1] "2014-01-19 19:56:00 +08"
## [1] "2014-01-19 18:25:00 +08"
## [1] "2014-01-19 19:49:00 +08"
## [1] "2014-01-19 20:09:00 +08"
## [1] "2014-01-19 20:20:00 +08"
## [1] "2014-01-18 20:21:00 +08"
## [1] "2014-01-19 20:27:00 +08"
## [1] "2014-01-19 14:32:00 +08"
## [1] "2014-01-19 20:03:00 +08"
## [1] "2014-01-16 17:50:00 +08"
## [1] "2014-01-19 20:38:00 +08"
## [1] "2014-01-19 19:46:00 +08"
## [1] "2014-01-19 20:04:00 +08"
## [1] "2014-01-19 20:56:00 +08"
## [1] "2014-01-19 20:36:00 +08"
## [1] "2014-01-19 20:11:00 +08"
## [1] "2014-01-19 19:54:00 +08"
## [1] "2014-01-19 19:46:00 +08"
## [1] "2014-01-19 20:25:00 +08"
## [1] "2014-01-17 17:21:00 +08"
## [1] "2014-01-16 21:06:00 +08"
## [1] "2014-01-16 19:07:00 +08"
## [1] "2014-01-16 20:00:00 +08"
## [1] "2014-01-19 19:50:00 +08"
elapsed.time
## [1] 2014-01-19 19:50:00 +08--2014-01-19 19:50:00 +08
location_point = read_csv("MC2/stop_fin.csv") %>% select(lat,long,Possible_Location)%>% unique()
stop_fin_2$lat = round(stop_fin_2$lat,5)
stop_fin_2$long = round(stop_fin_2$long,5)
stop_final = merge(stop_fin_2,location_point, by.x = c("lat","long"), by.y = c("lat","long"), all.x = TRUE)
stop_fin <- stop_final
stop_fin_location <- stop_fin[!(stop_fin$Possible_Location %like% 'mice'),]
gps_sf_stopfin <- st_as_sf(stop_fin_location,
coords = c("long","lat"),
crs=4326)
gps_sf_stopfin_unique1 <- gps_sf_stopfin %>% select(Possible_Location,geometry)%>%unique()%>%arrange(Possible_Location)
gps_sf_stopfin_unique <- gps_sf_stopfin_unique1[-c(221,476,644,1100,1103),]
gps_path_stopfin <- gps_sf_stopfin_unique %>%
group_by(Possible_Location) %>%
summarize(m_Timestamp = mean(geometry),
do_union = FALSE ) %>%
st_cast("LINESTRING")
Abila_st <- st_read(dsn = "MC2/Geospatial",
layer = "Abila")
## Reading layer `Abila' from data source
## `C:\jovinkahartanto\assignment_distill\MC2\Geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 3290 features and 9 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
## Geodetic CRS: WGS 84
P <- npts(Abila_st, by_feature = TRUE)
abila_st_2 <- cbind(Abila_st,P) %>% filter(P > 1)
bgmap <- raster("MC2/Geospatial/MC2-tourist.TIF")
tmap_mode("view")
tm_shape(bgmap) +
tm_rgb(bgmap, r = 1, g = 2, b = 3,
alpha = NA,
saturation = 1,
interpolate = TRUE,
max.value = 255) +
tm_shape(abila_st_2) +
tm_lines(col = "red", scale = 1)+
tm_shape(gps_path_stopfin[9,]) +
tm_lines(col = "blue", scale =4, interactive = TRUE) +
tm_text("Possible_Location", size = 3, remove.overlap = TRUE, overwrite.lines = TRUE, just = "top")
gps_data <- read_csv("MC2/gps.csv")
gps_data$Timestamp_date <- date_time_parse(gps_data$Timestamp,
zone = "",
format = "%m/%d/%Y")
gps_data$Timestamp <- date_time_parse(gps_data$Timestamp,
zone = "",
format = "%m/%d/%Y %H:%M")
gps_data$Time <- format(gps_data$Timestamp, format = "%H:%M")
gps_data$Hour <- get_hour(gps_data$Timestamp)
gps_sf <- st_as_sf(gps_data,
coords = c("long","lat"),
crs=4326)
gps_path <- gps_sf %>%
group_by(id, Timestamp_date,Hour) %>%
summarize(m_Timestamp = mean(Timestamp_date),
do_union = FALSE ) %>%
st_cast("LINESTRING")
Abila_st <- st_read(dsn = "MC2/Geospatial",
layer = "Abila")
## Reading layer `Abila' from data source
## `C:\jovinkahartanto\assignment_distill\MC2\Geospatial' using driver `ESRI Shapefile'
## Simple feature collection with 3290 features and 9 fields
## Geometry type: LINESTRING
## Dimension: XY
## Bounding box: xmin: 24.82401 ymin: 36.04502 xmax: 24.90997 ymax: 36.09492
## Geodetic CRS: WGS 84
P <- npts(Abila_st, by_feature = TRUE)
abila_st_2 <- cbind(Abila_st,P) %>% filter(P > 1)
gps_path_selected <- gps_path %>%
filter(id==28)
bgmap <- raster("MC2/Geospatial/MC2-tourist.TIF")
tmap_mode("view")
tm_shape(bgmap) +
tm_rgb(bgmap, r = 1, g = 2, b = 3,
alpha = NA,
saturation = 1,
interpolate = TRUE,
max.value = 255) +
tm_shape(abila_st_2) +
tm_lines(col = "red", scale = 1)+
tm_shape(gps_path_selected[1:4,]) +
tm_lines(col = "black", scale =4, interactive = TRUE) +
tm_text("Hour", size = 3, remove.overlap = TRUE, overwrite.lines = TRUE, just = "top")